Geocoded individual transaction data
GIS shapefiles
Note that all longitudes and latitudes should be in WGS1984 format
#pragma nodebook off
#Use nodebook for better reproducibility https://github.com/uoa-eResearch/nodebook
%reload_ext nodebook.ipython
%nodebook disk phase3
# load libraries
import geopandas as gpd # vector data
import pandas as pd # tabular data, loading CSVs
import numpy as np # numeric data
from util import *
import matplotlib # plotting
import contextily as ctx # Used for contextual basemaps
from matplotlib_scalebar.scalebar import ScaleBar # scalebar for plot
import matplotlib.pyplot as plt # plotting
from tqdm.auto import tqdm # progress bars
tqdm.pandas()
import json
from shapely.geometry import Point, shape, LineString, MultiLineString, GeometryCollection, MultiPoint, Polygon # creating points
import requests # web requests
from pandarallel import pandarallel # parallel operations for speed
pandarallel.initialize(progress_bar=True)
plt.rcParams['figure.figsize'] = (20, 20)
INFO: Pandarallel will run on 16 workers. INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.
ls()
| name | filesize (MB) | last modified | |
|---|---|---|---|
| 0 | 2013-mb-dataset-Total-New-Zealand-Household.csv | 37.12 | 2014-06-04 10:56:30.000000 |
| 1 | 2013-mb-dataset-Total-New-Zealand-Individual-Part-1.csv | 31.66 | 2014-04-01 10:13:15.197000 |
| 2 | 2013_census_mean_income_by_AU2013.xlsx | 0.59 | 2021-09-07 06:21:24.000000 |
| 3 | 2018-census-electoral-population-meshblock-2020-data.csv | 5.60 | 2021-08-09 00:39:18.000000 |
| 4 | 2018_census_dwellings_by_SA2.xlsx | 0.43 | 2021-07-12 14:30:28.000000 |
| 5 | AC_Special_Housing_Area.zip | 0.29 | 2021-09-02 10:36:16.760000 |
| 6 | AucklandArea.gpkg | 0.09 | 2021-09-02 10:36:17.220000 |
| 7 | Geocoordinates_Direct_Transit_stops_AKL.xlsx | 0.01 | 2020-10-08 07:52:29.000000 |
| 8 | Individual_part1_totalNZ-wide_format_updated_16-7-20.csv | 36.58 | 2020-07-14 16:12:24.000000 |
| 9 | MASTER_UP_BaseZone_SHP.zip | 66.92 | 2021-07-19 02:23:51.137347 |
| 10 | Modified_Community_Boards_SHP.zip | 1.30 | 2021-07-19 02:16:07.650000 |
| 11 | area-unit-2013.gdb.zip | 14.21 | 2021-09-02 10:36:16.070000 |
| 12 | kx-nz-state-highway-on-ramps-off-ramps-SHP.zip | 0.14 | 2021-08-25 09:52:06.341291 |
| 13 | lds-nz-coastline-mean-high-water-FGDB.zip | 4.88 | 2021-08-31 16:25:16.250000 |
| 14 | lds-nz-coastlines-and-islands-polygons-topo-150k-FGDB.zip | 4.25 | 2021-08-05 15:57:49.020000 |
| 15 | lds-nz-primary-parcels-CLIPPED-4326.gpkg | 273.35 | 2021-09-02 10:37:32.410000 |
| 16 | lds-nz-primary-parcels-FGDB.zip | 79.03 | 2021-08-12 09:13:27.910000 |
| 17 | lds-nz-railway-centrelines-topo-150k-SHP.zip | 0.87 | 2021-08-31 10:01:46.315326 |
| 18 | lds-nz-road-centrelines-topo-150k-FGDB.zip | 36.06 | 2021-08-31 09:43:07.931704 |
| 19 | lds-nz-street-address-GPKG-CLIPPED.gpkg | 170.23 | 2021-09-02 10:37:19.490000 |
| 20 | lds-nz-street-address-GPKG.zip | 200.88 | 2021-09-02 10:37:26.070000 |
| 21 | meshblock-2013.gdb.zip | 106.57 | 2021-09-02 10:36:59.860000 |
| 22 | meshblock-2018-clipped-generalised.gdb.zip | 32.83 | 2021-09-02 10:36:26.010000 |
| 23 | nz-primary-parcels.gdb.zip | 55.00 | 2021-09-02 10:36:39.020000 |
| 24 | statsnzarea-unit-2013-FGDB.zip | 13.79 | 2021-08-31 16:56:17.150000 |
| 25 | statsnzmeshblock-higher-geographies-2018-generalised-FGDB.zip | 34.54 | 2021-08-05 14:53:54.350000 |
| 26 | statsnzpopulation-by-meshblock-2013-census-FGDB.zip | 82.11 | 2021-07-19 13:53:55.150631 |
| 27 | statsnzstatistical-area-2-higher-geographies-2018-clipped-generalis-FGDB.zip | 10.72 | 2021-08-30 17:12:55.591895 |
Total: 1300.0MB
ls("restricted")
| name | filesize (MB) | last modified | |
|---|---|---|---|
| 0 | QPIDs_Auckland_1990_2020.csv | 17.40 | 2021-07-07 13:09:39.000 |
| 1 | QPIDs_Auckland_1990_2020_augmented.csv | 240.48 | 2021-09-10 16:19:26.030 |
| 2 | road_intersections.pkl | 44.95 | 2021-09-10 15:59:13.120 |
Total: 303.0MB
df = pd.read_csv("restricted/QPIDs_Auckland_1990_2020.csv")
df = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.CL_Longitude, df.CL_Latitude), crs="EPSG:4326")
df = df.set_index("CL_QPID_output2")
df
| CL_Longitude | CL_Latitude | QPID_vintage | geometry | |
|---|---|---|---|---|
| CL_QPID_output2 | ||||
| 75494 | 174.588941 | -36.186076 | 2020 | POINT (174.58894 -36.18608) |
| 75499 | 174.581811 | -36.200345 | 2020 | POINT (174.58181 -36.20034) |
| 75639 | 174.496590 | -36.228740 | 2020 | POINT (174.49659 -36.22874) |
| 75640 | 174.498361 | -36.228827 | 2020 | POINT (174.49836 -36.22883) |
| 75654 | 174.493736 | -36.229990 | 2020 | POINT (174.49374 -36.22999) |
| ... | ... | ... | ... | ... |
| 3160123 | 174.728112 | -36.408104 | 2018 | POINT (174.72811 -36.40810) |
| 3160124 | 174.728164 | -36.408113 | 2018 | POINT (174.72816 -36.40811) |
| 3160630 | 174.655992 | -36.509925 | 2018 | POINT (174.65599 -36.50992) |
| 3164546 | 174.537978 | -36.774834 | 2018 | POINT (174.53798 -36.77483) |
| 3166852 | 174.644137 | -36.854796 | 2018 | POINT (174.64414 -36.85480) |
383272 rows × 4 columns
sum(pd.isna(df.CL_Longitude))
7178
%%time
parcels = gpd.read_file('input/lds-nz-primary-parcels-FGDB.zip!nz-primary-parcels.gdb')
parcels = parcels.to_crs(df.crs)
parcels = parcels.set_index("id")
parcels["parcel_geometry"] = parcels.geometry
CPU times: user 41.6 s, sys: 1.97 s, total: 43.6 s Wall time: 43.8 s
parcels
| appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | titles | survey_area | calc_area | geometry | parcel_geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| id | |||||||||||
| 5266447 | None | None | Hydro | Primary | None | North Auckland | None | NaN | 1289690.0 | MULTIPOLYGON (((174.46092 -36.26249, 174.46083... | MULTIPOLYGON (((174.46092 -36.26249, 174.46083... |
| 4789727 | Part Lot 3 Allot 64 Section 1 SBRS OF Auckland | SO 663 | DCDB | Primary | None | North Auckland | None | NaN | 1.0 | MULTIPOLYGON (((174.77836 -36.85187, 174.77838... | MULTIPOLYGON (((174.77836 -36.85187, 174.77838... |
| 4810316 | Part Tidal Lands of Manukau Harbour Survey Off... | SO 67474 | DCDB | Primary | [Create] Revested in the Crown Sec 5 Foreshore... | North Auckland | None | 31600000.0 | 31342451.0 | MULTIPOLYGON (((174.68836 -37.01631, 174.68898... | MULTIPOLYGON (((174.68836 -37.01631, 174.68898... |
| 4817943 | Crown Land Survey Office Plan 58175 | SO 58175 | DCDB | Primary | [Create] Crown Land Reserved from Sale Sec 58 ... | North Auckland | None | NaN | 467490.0 | MULTIPOLYGON (((174.33091 -36.32797, 174.33059... | MULTIPOLYGON (((174.33091 -36.32797, 174.33059... |
| 4827816 | Lot 2 DP 165098 | DP 165098 | DCDB | Primary | None | North Auckland | NA99B/977 | 22979.0 | 22972.0 | MULTIPOLYGON (((174.73406 -36.69245, 174.73408... | MULTIPOLYGON (((174.73406 -36.69245, 174.73408... |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7291940 | Lot 745 DP 433546 | DP 433546 | Fee Simple Title | Primary | None | North Auckland | 528968 | 247.0 | 247.0 | MULTIPOLYGON (((174.93787 -37.03903, 174.93763... | MULTIPOLYGON (((174.93787 -37.03903, 174.93763... |
| 7266269 | Lot 533 DP 427884 | DP 427884, LT 454543 | Fee Simple Title | Primary | None | North Auckland | 520875 | 336.0 | 336.0 | MULTIPOLYGON (((174.93990 -37.03814, 174.94015... | MULTIPOLYGON (((174.93990 -37.03814, 174.94015... |
| 8051540 | Lot 8 DP 533517 | DP 533517 | Fee Simple Title | Primary | None | North Auckland | 876934 | 185.0 | 185.0 | MULTIPOLYGON (((174.67269 -36.59236, 174.67269... | MULTIPOLYGON (((174.67269 -36.59236, 174.67269... |
| 7264065 | Lot 518 DP 429024 | DP 429024 | Fee Simple Title | Primary | None | North Auckland | 513803 | 313.0 | 313.0 | MULTIPOLYGON (((174.93753 -37.03880, 174.93747... | MULTIPOLYGON (((174.93753 -37.03880, 174.93747... |
| 7266268 | Lot 532 DP 427884 | DP 427884, LT 454543 | Fee Simple Title | Primary | None | North Auckland | 520874 | 331.0 | 331.0 | MULTIPOLYGON (((174.94009 -37.03794, 174.94015... | MULTIPOLYGON (((174.94009 -37.03794, 174.94015... |
537290 rows × 11 columns
%%time
df = gpd.sjoin(df, parcels, how="left")
CPU times: user 40 s, sys: 1.35 s, total: 41.4 s Wall time: 41.7 s
df.parcel_intent.value_counts()
DCDB 276092 Fee Simple Title 99917 Legalisation 79 Road 3 Vesting on Deposit for Local Purpose Reserve 2 Vesting on Deposit for Recreation Reserve (Territorial Authority) 1 Name: parcel_intent, dtype: int64
# Roads shouldn't be in this dataset - might have to give these 3 points a little nudge
sold_roads = df[df.parcel_intent == "Road"]
for i in range(len(sold_roads)):
sold_road = sold_roads.iloc[i:i+1]
display(sold_road)
ax = sold_road.parcel_geometry.to_crs(epsg=3857).plot(alpha=.5)
sold_road.parcel_geometry.centroid.to_crs(epsg=3857).plot(ax=ax, color="red")
sold_road.to_crs(epsg=3857).plot(ax=ax, color="green")
ctx.add_basemap(ax, source=ctx.providers.Esri.WorldImagery, zoom=21 if i == 0 else "auto")
ax.set_title("Red = parcel centroid, green = QPID latlong")
plt.show()
| CL_Longitude | CL_Latitude | QPID_vintage | geometry | index_right | appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | titles | survey_area | calc_area | parcel_geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CL_QPID_output2 | |||||||||||||||
| 258637 | 174.84187 | -36.903922 | 2020 | POINT (174.84187 -36.90392) | 7515185.0 | Section 1 SO 471988 | SO 471988, SO 509515 | Road | Primary | [Create] Land declared Road New Zealand Gazett... | North Auckland | None | 690.0 | 688.0 | MULTIPOLYGON (((174.84229 -36.90402, 174.84226... |
<string>:6: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.
| CL_Longitude | CL_Latitude | QPID_vintage | geometry | index_right | appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | titles | survey_area | calc_area | parcel_geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CL_QPID_output2 | |||||||||||||||
| 1528194 | 174.642663 | -36.59097 | 2020 | POINT (174.64266 -36.59097) | 6605265.0 | None | SO 70497 | Road | Primary | [Create] Road New Zealand Gazette 2002 p 4425 | North Auckland | None | NaN | 31611.0 | MULTIPOLYGON (((174.63916 -36.58870, 174.63934... |
<string>:6: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.
| CL_Longitude | CL_Latitude | QPID_vintage | geometry | index_right | appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | titles | survey_area | calc_area | parcel_geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CL_QPID_output2 | |||||||||||||||
| 1779952 | 174.749001 | -36.823758 | 2020 | POINT (174.74900 -36.82376) | 5222196.0 | None | None | Road | Primary | None | North Auckland | None | NaN | 3462.0 | MULTIPOLYGON (((174.74895 -36.82419, 174.74897... |
<string>:6: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.
df.at[258637, "parcel_intent"] = "Glitch?"
print(f"{1e-4} degrees equates to {df.loc[[1528194]].to_crs(epsg=2193).distance(df.loc[[1528194]].translate(yoff=-1e-4).to_crs(epsg=2193)).iloc[0]} meters")
0.0001 degrees equates to 11.09551261419352 meters
problems = [1528194, 1779952]
display(df.loc[problems])
# Nudge point south a little bit
df.geometry[1528194] = df.loc[[1528194]].translate(yoff=-1e-4)
# Nudge point west a little bit
df.geometry[1779952] = df.loc[[1779952]].translate(xoff=-1e-4)
# Redo the join for these newly adjusted points
subset = df.loc[df.index.isin(problems), ["CL_Longitude", "CL_Latitude", "QPID_vintage", "geometry"]]
df.loc[problems] = gpd.sjoin(subset, parcels)
display(df.loc[problems])
| CL_Longitude | CL_Latitude | QPID_vintage | geometry | index_right | appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | titles | survey_area | calc_area | parcel_geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CL_QPID_output2 | |||||||||||||||
| 1528194 | 174.642663 | -36.590970 | 2020 | POINT (174.64266 -36.59097) | 6605265.0 | None | SO 70497 | Road | Primary | [Create] Road New Zealand Gazette 2002 p 4425 | North Auckland | None | NaN | 31611.0 | MULTIPOLYGON (((174.63916 -36.58870, 174.63934... |
| 1779952 | 174.749001 | -36.823758 | 2020 | POINT (174.74900 -36.82376) | 5222196.0 | None | None | Road | Primary | None | North Auckland | None | NaN | 3462.0 | MULTIPOLYGON (((174.74895 -36.82419, 174.74897... |
| CL_Longitude | CL_Latitude | QPID_vintage | geometry | index_right | appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | titles | survey_area | calc_area | parcel_geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CL_QPID_output2 | |||||||||||||||
| 1528194 | 174.642663 | -36.590970 | 2020 | POINT (174.64266 -36.59107) | 5201612.0 | Part Allot 74 PSH OF Waiwera | LT 549520, SO 1693/B | DCDB | Primary | None | North Auckland | NA1129/238 | 2289.0 | 2286.0 | MULTIPOLYGON (((174.64203 -36.59121, 174.64232... |
| 1779952 | 174.749001 | -36.823758 | 2020 | POINT (174.74890 -36.82376) | 4839993.0 | Lot 1 DP 154975 | DP 154975 | DCDB | Primary | None | North Auckland | NA125D/837, NA125D/838 | 811.0 | 760.0 | MULTIPOLYGON (((174.74859 -36.82361, 174.74901... |
df.parcel_intent.value_counts()
DCDB 276094 Fee Simple Title 99917 Legalisation 79 Vesting on Deposit for Local Purpose Reserve 2 Glitch? 1 Vesting on Deposit for Recreation Reserve (Territorial Authority) 1 Name: parcel_intent, dtype: int64
df = df.rename(columns={"titles": "LINZ_parcel_ID"})
df.index_right = df.index_right.astype('Int64')
df
| CL_Longitude | CL_Latitude | QPID_vintage | geometry | index_right | appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | LINZ_parcel_ID | survey_area | calc_area | parcel_geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CL_QPID_output2 | |||||||||||||||
| 75494 | 174.588941 | -36.186076 | 2020 | POINT (174.58894 -36.18608) | 5128312 | Part Allot S25 PSH OF Arai | None | DCDB | Primary | None | North Auckland | NA589/73 | NaN | 2913.0 | MULTIPOLYGON (((174.58877 -36.18632, 174.58888... |
| 75499 | 174.581811 | -36.200345 | 2020 | POINT (174.58181 -36.20034) | 5128309 | Lot 2 DP 130303 | DP 130303 | DCDB | Primary | None | North Auckland | NA76B/662 | 9859.0 | 8620.0 | MULTIPOLYGON (((174.58154 -36.20156, 174.58161... |
| 75639 | 174.496590 | -36.228740 | 2020 | POINT (174.49659 -36.22874) | 4823770 | Part Otioro & Te Topuni A2A Block | ML 9928 | DCDB | Primary | None | North Auckland | NA1373/48 | 8296.0 | 7490.0 | MULTIPOLYGON (((174.49514 -36.22889, 174.49701... |
| 75640 | 174.498361 | -36.228827 | 2020 | POINT (174.49836 -36.22883) | 4697657 | Lot 1 DP 44316 | DP 44316 | DCDB | Primary | None | North Auckland | NA1373/47 | 3667.0 | 4152.0 | MULTIPOLYGON (((174.49863 -36.22877, 174.49862... |
| 75654 | 174.493736 | -36.229990 | 2020 | POINT (174.49374 -36.22999) | 5202126 | Lot 1 DP 52926 | DP 52926 | DCDB | Primary | None | North Auckland | NA8B/226 | 3331.0 | 3323.0 | MULTIPOLYGON (((174.49427 -36.23026, 174.49386... |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 3160123 | 174.728112 | -36.408104 | 2018 | POINT (174.72811 -36.40810) | 7974171 | Lot 82 DP 521864 | DP 521864 | Fee Simple Title | Primary | None | North Auckland | 826394 | 39.0 | 40.0 | MULTIPOLYGON (((174.72813 -36.40815, 174.72812... |
| 3160124 | 174.728164 | -36.408113 | 2018 | POINT (174.72816 -36.40811) | 7974172 | Lot 83 DP 521864 | DP 521864 | Fee Simple Title | Primary | None | North Auckland | 826395 | 39.0 | 40.0 | MULTIPOLYGON (((174.72813 -36.40815, 174.72814... |
| 3160630 | 174.655992 | -36.509925 | 2018 | POINT (174.65599 -36.50992) | 7440747 | Lot 2 DP 460961 | DP 460961 | Fee Simple Title | Primary | None | North Auckland | 605470 | 4500.0 | 4495.0 | MULTIPOLYGON (((174.65635 -36.51025, 174.65624... |
| 3164546 | 174.537978 | -36.774834 | 2018 | POINT (174.53798 -36.77483) | 7987560 | Lot 1 DP 533552 | DP 533552 | Fee Simple Title | Primary | None | North Auckland | 878889 | 62163.0 | 62143.0 | MULTIPOLYGON (((174.53978 -36.77485, 174.53974... |
| 3166852 | 174.644137 | -36.854796 | 2018 | POINT (174.64414 -36.85480) | 7799795 | Section 36 SO 498829 | SO 498829 | Fee Simple Title | Primary | [Create] Fee Simple Title New Zealand Gazette ... | North Auckland | 900731 | 1460.0 | 1459.0 | MULTIPOLYGON (((174.64432 -36.85494, 174.64428... |
383272 rows × 15 columns
df.LINZ_parcel_ID.sample(10)
CL_QPID_output2 1692231 NA90C/193 261242 NA122A/921 2296004 91598 1925785 NA118C/310, NA127D/138, NA127D/139 2528611 276306 1654512 NA6B/503 2104176 NA135C/208 2415360 172798 1829679 NA104D/107, NA105C/436, NA105C/437 2972627 716812 Name: LINZ_parcel_ID, dtype: object
b. Centroid longitude of parcel(s). LINZ_parcel_centroid_lon
c. Centroid latitude of parcel(s). LINZ_parcel_centroid_lat
df["LINZ_parcel_centroid_lon"] = df.parcel_geometry.centroid.x
df["LINZ_parcel_centroid_lat"] = df.parcel_geometry.centroid.y
<string>:1: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation. <string>:2: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.
sample_parcels = parcels.cx[174.782:174.783, -36.870:-36.871]
ax = sample_parcels.to_crs(epsg=3857).plot(column="parcel_intent", legend=True, alpha=.5, categorical=True, edgecolor="black")
sample_parcels.centroid.to_crs(epsg=3857).plot(ax=ax, color="red")
df[df.index_right.isin(sample_parcels.index)].to_crs(epsg=3857).plot(ax=ax, color="green")
ctx.add_basemap(ax, source=ctx.providers.Esri.WorldImagery)
plt.title("Red = parcel centroid, green = QPID latlong")
<string>:2: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.
Text(0.5, 1.0, 'Red = parcel centroid, green = QPID latlong')
d. vector of longitudes of the vertices of the matched LINZ parcels LINZ_parcel_vertices_lon
e. vector of latitudes of the vertices of the matched LINZ parcels LINZ_parcel_vertices_lat
pd.Series(type(a) for a in df.parcel_geometry).value_counts()
<class 'shapely.geometry.multipolygon.MultiPolygon'> 376094 <class 'NoneType'> 7178 dtype: int64
%%time
def extract_verts(geometry):
lat = np.nan
lng = np.nan
if geometry:
coordinates = []
for polygon in geometry:
coordinates.extend(polygon.exterior.coords[:-1]) # Drop last point, as it's just the first point again
lng = f"[{'; '.join([str(round(point[0], 6)) for point in coordinates])}]"
lat = f"[{'; '.join([str(round(point[1], 6)) for point in coordinates])}]"
return pd.Series([lng, lat])
df[["LINZ_parcel_vertices_lon", "LINZ_parcel_vertices_lat"]] = df.parcel_geometry.progress_apply(extract_verts)
<string>:6: ShapelyDeprecationWarning: Iteration over multi-part geometries is deprecated and will be removed in Shapely 2.0. Use the `geoms` property to access the constituent parts of a multi-part geometry.
CPU times: user 1min 40s, sys: 2.05 s, total: 1min 42s Wall time: 1min 41s
f. subvector of longitudes of parcel that sits adjacent to a road LINZ_parcel_roadvertices_lon
g. subvector of latitudes of parcel that sits adjacent to a road LINZ_parcel_roadvertices_lat
Let's start with a simple test of one row.
sample = df.iloc[0]
sample
/usr/local/lib/python3.8/dist-packages/pandas/core/dtypes/inference.py:383: ShapelyDeprecationWarning: Iteration over multi-part geometries is deprecated and will be removed in Shapely 2.0. Use the `geoms` property to access the constituent parts of a multi-part geometry. iter(obj) # Can iterate over it. /usr/local/lib/python3.8/dist-packages/pandas/core/dtypes/inference.py:384: ShapelyDeprecationWarning: __len__ for multi-part geometries is deprecated and will be removed in Shapely 2.0. Check the length of the `geoms` property instead to get the number of parts of a multi-part geometry. len(obj) # Has a length associated with it. /usr/local/lib/python3.8/dist-packages/pandas/io/formats/printing.py:118: ShapelyDeprecationWarning: Iteration over multi-part geometries is deprecated and will be removed in Shapely 2.0. Use the `geoms` property to access the constituent parts of a multi-part geometry. s = iter(seq) /usr/local/lib/python3.8/dist-packages/pandas/io/formats/printing.py:122: ShapelyDeprecationWarning: __len__ for multi-part geometries is deprecated and will be removed in Shapely 2.0. Check the length of the `geoms` property instead to get the number of parts of a multi-part geometry. for i in range(min(nitems, len(seq))) /usr/local/lib/python3.8/dist-packages/pandas/io/formats/printing.py:126: ShapelyDeprecationWarning: __len__ for multi-part geometries is deprecated and will be removed in Shapely 2.0. Check the length of the `geoms` property instead to get the number of parts of a multi-part geometry. if nitems < len(seq):
CL_Longitude 174.588941 CL_Latitude -36.186076 QPID_vintage 2020 geometry POINT (174.588940530675 -36.1860763144825) index_right 5128312 appellation Part Allot S25 PSH OF Arai affected_surveys None parcel_intent DCDB topology_type Primary statutory_actions None land_district North Auckland LINZ_parcel_ID NA589/73 survey_area NaN calc_area 2913.0 parcel_geometry (POLYGON ((174.5887711830001 -36.1863244329999... LINZ_parcel_centroid_lon 174.589185 LINZ_parcel_centroid_lat -36.186039 LINZ_parcel_vertices_lon [174.588771; 174.588883; 174.589331; 174.58955... LINZ_parcel_vertices_lat [-36.186324; -36.185872; -36.18579; -36.186183... Name: 75494, dtype: object
%%time
xmin, ymin, xmax, ymax = sample.parcel_geometry.bounds
search_area = parcels.cx[xmin:xmax, ymin:ymax]
neighbours = search_area[search_area.touches(sample.parcel_geometry)]
neighbours
CPU times: user 13.5 s, sys: 621 ms, total: 14.1 s Wall time: 14.3 s
| appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | titles | survey_area | calc_area | geometry | parcel_geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| id | |||||||||||
| 5207551 | None | None | Road | Primary | None | North Auckland | None | NaN | 8063.0 | MULTIPOLYGON (((174.58888 -36.18587, 174.58877... | MULTIPOLYGON (((174.58888 -36.18587, 174.58877... |
| 5224880 | None | None | Road | Primary | None | North Auckland | None | NaN | 10916.0 | MULTIPOLYGON (((174.58967 -36.18646, 174.58967... | MULTIPOLYGON (((174.58967 -36.18646, 174.58967... |
| 4941664 | Lot 2 DP 130126 | DP 130126 | DCDB | Primary | None | North Auckland | NA131B/147 | 21810.0 | 21780.0 | MULTIPOLYGON (((174.59156 -36.18479, 174.59187... | MULTIPOLYGON (((174.59156 -36.18479, 174.59187... |
| 6744495 | Lot 43 DP 340819 | DP 340819, DP 477278, LT 409850 | Fee Simple Title | Primary | None | North Auckland | 661091 | 1529.0 | 1528.0 | MULTIPOLYGON (((174.58907 -36.18508, 174.58920... | MULTIPOLYGON (((174.58907 -36.18508, 174.58920... |
| 7558496 | Lot 16 DP 477278 | DP 477278, LT 494228 | Fee Simple Title | Primary | None | North Auckland | 661091 | 6043.0 | 6041.0 | MULTIPOLYGON (((174.58872 -36.18652, 174.58877... | MULTIPOLYGON (((174.58872 -36.18652, 174.58877... |
search_area.parcel_intent[sample.index_right] = "AOI"
cmap = matplotlib.colors.ListedColormap(['red', 'blue', 'green', 'pink'])
ax = search_area.to_crs(epsg=3857).plot(column="parcel_intent", cmap=cmap, legend=True, categorical=True, alpha=.5)
roads = neighbours[neighbours.parcel_intent == "Road"]
intersection = roads.geometry.intersection(sample.parcel_geometry)
intersection.to_crs(epsg=3857).plot(ax=ax, color="red", linewidth=5)
ctx.add_basemap(ax, url=ctx.providers.Esri.WorldImagery)
<string>:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy <string>:7: FutureWarning: The "url" option is deprecated. Please use the "source" argument instead.
%%time
roads = parcels[parcels.parcel_intent == "Road"]
CPU times: user 13.2 s, sys: 621 ms, total: 13.8 s Wall time: 13.9 s
%%time
if os.path.isfile("restricted/road_intersections.pkl"):
road_intersections = pd.read_pickle("restricted/road_intersections.pkl")
else:
# Computationally intensive - with 16 CPUs - Wall time: 1h 23min 27s
def extract_road_intersections(geom):
if geom:
xmin, ymin, xmax, ymax = geom.bounds
neighbouring_roads = roads.cx[xmin:xmax, ymin:ymax]
return neighbouring_roads.geometry.intersection(geom).unary_union
return np.nan
road_intersections = df.parcel_geometry.parallel_apply(extract_road_intersections)
road_intersections.to_pickle("restricted/road_intersections.pkl")
CPU times: user 9.08 s, sys: 320 ms, total: 9.4 s Wall time: 9.47 s
types = pd.Series(type(i) for i in road_intersections)
types.index = road_intersections.index
types.value_counts()
<class 'shapely.geometry.linestring.LineString'> 199991 <class 'shapely.geometry.multilinestring.MultiLineString'> 141418 <class 'NoneType'> 40163 <class 'shapely.geometry.collection.GeometryCollection'> 1290 <class 'shapely.geometry.point.Point'> 393 <class 'shapely.geometry.multipoint.MultiPoint'> 16 <class 'shapely.geometry.polygon.Polygon'> 1 dtype: int64
display(df[types == Polygon])
| CL_Longitude | CL_Latitude | QPID_vintage | geometry | index_right | appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | LINZ_parcel_ID | survey_area | calc_area | parcel_geometry | LINZ_parcel_centroid_lon | LINZ_parcel_centroid_lat | LINZ_parcel_vertices_lon | LINZ_parcel_vertices_lat | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CL_QPID_output2 | |||||||||||||||||||
| 258637 | 174.84187 | -36.903922 | 2020 | POINT (174.84187 -36.90392) | 7515185 | Section 1 SO 471988 | SO 471988, SO 509515 | Glitch? | Primary | [Create] Land declared Road New Zealand Gazett... | North Auckland | None | 690.0 | 688.0 | MULTIPOLYGON (((174.84229 -36.90402, 174.84226... | 174.841943 | -36.903956 | [174.842286; 174.842257; 174.84188; 174.841667... | [-36.904017; -36.904065; -36.904015; -36.90396... |
geom = df.parcel_geometry[258637]
xmin, ymin, xmax, ymax = geom.bounds
neighbouring_roads = roads.cx[xmin:xmax, ymin:ymax]
neighbouring_roads = neighbouring_roads[neighbouring_roads.touches(geom)]
road_intersections[258637] = neighbouring_roads.geometry.intersection(geom).unary_union
types[258637] = type(road_intersections[258637])
types.value_counts()
<class 'shapely.geometry.linestring.LineString'> 199991 <class 'shapely.geometry.multilinestring.MultiLineString'> 141419 <class 'NoneType'> 40163 <class 'shapely.geometry.collection.GeometryCollection'> 1290 <class 'shapely.geometry.point.Point'> 393 <class 'shapely.geometry.multipoint.MultiPoint'> 16 dtype: int64
collections = road_intersections[types == GeometryCollection]
collection_types = []
for c in collections:
collection_types.extend(type(o) for o in c)
pd.Series(collection_types).value_counts()
<string>:3: ShapelyDeprecationWarning: Iteration over multi-part geometries is deprecated and will be removed in Shapely 2.0. Use the `geoms` property to access the constituent parts of a multi-part geometry.
<class 'shapely.geometry.linestring.LineString'> 1245 <class 'shapely.geometry.point.Point'> 572 <class 'shapely.geometry.polygon.Polygon'> 2 dtype: int64
for i, c in collections.iteritems():
if Polygon in [type(o) for o in c]:
print(i)
2211389 2232849
<string>:2: ShapelyDeprecationWarning: Iteration over multi-part geometries is deprecated and will be removed in Shapely 2.0. Use the `geoms` property to access the constituent parts of a multi-part geometry.
problems = df.loc[[2211389, 2232849]]
display(problems)
xmin, ymin, xmax, ymax = problems.parcel_geometry.unary_union.bounds
search_area = parcels.cx[xmin:xmax, ymin:ymax]
search_area.parcel_intent[problems.index_right] = "AOI"
cmap = matplotlib.colors.ListedColormap(['red', 'blue', 'green', 'pink'])
ax = search_area.to_crs(epsg=3857).plot(column="parcel_intent", cmap=cmap, legend=True, categorical=True, alpha=.5, edgecolor="black")
roads = search_area[search_area.parcel_intent == "Road"]
intersection = roads.geometry.intersection(problems.parcel_geometry.unary_union)
intersection.to_crs(epsg=3857).plot(ax=ax, color="red", linewidth=5)
intersection.boundary.to_crs(epsg=3857).plot(ax=ax, color="blue", linewidth=5)
display(intersection)
ctx.add_basemap(ax, source=ctx.providers.Esri.WorldImagery)
| CL_Longitude | CL_Latitude | QPID_vintage | geometry | index_right | appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | LINZ_parcel_ID | survey_area | calc_area | parcel_geometry | LINZ_parcel_centroid_lon | LINZ_parcel_centroid_lat | LINZ_parcel_vertices_lon | LINZ_parcel_vertices_lat | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CL_QPID_output2 | |||||||||||||||||||
| 2211389 | 174.690915 | -36.764765 | 2020 | POINT (174.69092 -36.76476) | 6605986 | Lot 109 DP 310598 | DP 310598 | Fee Simple Title | Primary | None | North Auckland | 41583 | 603.0 | 603.0 | MULTIPOLYGON (((174.69072 -36.76480, 174.69098... | 174.690924 | -36.764765 | [174.69072; 174.690984; 174.691061; 174.691117... | [-36.764801; -36.764606; -36.764662; -36.76473... |
| 2232849 | 174.690710 | -36.764933 | 2020 | POINT (174.69071 -36.76493) | 6616860 | Lot 68 DP 312607 | DP 312607 | Fee Simple Title | Primary | None | North Auckland | 49611 | 502.0 | 502.0 | MULTIPOLYGON (((174.69053 -36.76494, 174.69072... | 174.690721 | -36.764940 | [174.69053; 174.69072; 174.690863; 174.690907;... | [-36.764943; -36.764801; -36.764926; -36.76496... |
<string>:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy /usr/local/lib/python3.8/dist-packages/pandas/core/series.py:1135: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy self._set_values(indexer, value)
id 6616875 POLYGON ((174.69072 -36.76480, 174.69053 -36.7... 6606001 MULTILINESTRING ((174.69112 -36.76474, 174.691... 6616867 MULTILINESTRING ((174.69053 -36.76494, 174.690... 6606004 POLYGON ((174.69072 -36.76480, 174.69072 -36.7... dtype: geometry
%%time
LINZ_parcel_roadvertices_lon = []
LINZ_parcel_roadvertices_lat = []
for intersection in tqdm(road_intersections):
lat = np.nan
lon = np.nan
if intersection:
lat = []
lon = []
try:
if type(intersection) in (LineString, Point):
lon.extend(intersection.xy[0])
lat.extend(intersection.xy[1])
elif type(intersection) in (MultiLineString, GeometryCollection, MultiPoint):
for p in intersection.geoms:
if type(p) == Polygon:
lon.extend(p.exterior.xy[0][:-1])
lat.extend(p.exterior.xy[1][:-1])
else:
lon.extend(p.xy[0])
lat.extend(p.xy[1])
else:
raise Exception(f"Don't know how to handle {type(intersection)}")
except:
print(type(intersection))
print(intersection)
raise
lon = f"[{'; '.join([str(round(lon, 6)) for lon in lon])}]"
lat = f"[{'; '.join([str(round(lat, 6)) for lat in lat])}]"
LINZ_parcel_roadvertices_lon.append(lon)
LINZ_parcel_roadvertices_lat.append(lat)
df["LINZ_parcel_roadvertices_lon"] = LINZ_parcel_roadvertices_lon
df["LINZ_parcel_roadvertices_lat"] = LINZ_parcel_roadvertices_lat
CPU times: user 43.8 s, sys: 765 ms, total: 44.6 s Wall time: 44.5 s
df
| CL_Longitude | CL_Latitude | QPID_vintage | geometry | index_right | appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | ... | LINZ_parcel_ID | survey_area | calc_area | parcel_geometry | LINZ_parcel_centroid_lon | LINZ_parcel_centroid_lat | LINZ_parcel_vertices_lon | LINZ_parcel_vertices_lat | LINZ_parcel_roadvertices_lon | LINZ_parcel_roadvertices_lat | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CL_QPID_output2 | |||||||||||||||||||||
| 75494 | 174.588941 | -36.186076 | 2020 | POINT (174.58894 -36.18608) | 5128312 | Part Allot S25 PSH OF Arai | None | DCDB | Primary | None | ... | NA589/73 | NaN | 2913.0 | MULTIPOLYGON (((174.58877 -36.18632, 174.58888... | 174.589185 | -36.186039 | [174.588771; 174.588883; 174.589331; 174.58955... | [-36.186324; -36.185872; -36.18579; -36.186183... | [174.588883; 174.588771; 174.589551; 174.58933... | [-36.185872; -36.186324; -36.186183; -36.18579... |
| 75499 | 174.581811 | -36.200345 | 2020 | POINT (174.58181 -36.20034) | 5128309 | Lot 2 DP 130303 | DP 130303 | DCDB | Primary | None | ... | NA76B/662 | 9859.0 | 8620.0 | MULTIPOLYGON (((174.58154 -36.20156, 174.58161... | 174.581884 | -36.200845 | [174.581541; 174.581606; 174.581586; 174.58184... | [-36.201565; -36.201229; -36.201017; -36.19972... | [174.581841; 174.581586; 174.581586; 174.58160... | [-36.199726; -36.201017; -36.201017; -36.20122... |
| 75639 | 174.496590 | -36.228740 | 2020 | POINT (174.49659 -36.22874) | 4823770 | Part Otioro & Te Topuni A2A Block | ML 9928 | DCDB | Primary | None | ... | NA1373/48 | 8296.0 | 7490.0 | MULTIPOLYGON (((174.49514 -36.22889, 174.49701... | 174.496751 | -36.228824 | [174.495137; 174.497012; 174.497667; 174.49766... | [-36.228888; -36.228552; -36.228639; -36.22893... | [174.497667; 174.497012; 174.497012; 174.495137] | [-36.228639; -36.228552; -36.228552; -36.228888] |
| 75640 | 174.498361 | -36.228827 | 2020 | POINT (174.49836 -36.22883) | 4697657 | Lot 1 DP 44316 | DP 44316 | DCDB | Primary | None | ... | NA1373/47 | 3667.0 | 4152.0 | MULTIPOLYGON (((174.49863 -36.22877, 174.49862... | 174.498193 | -36.228933 | [174.498626; 174.498615; 174.498386; 174.49766... | [-36.228766; -36.229337; -36.22924; -36.228936... | [174.498626; 174.497667] | [-36.228766; -36.228639] |
| 75654 | 174.493736 | -36.229990 | 2020 | POINT (174.49374 -36.22999) | 5202126 | Lot 1 DP 52926 | DP 52926 | DCDB | Primary | None | ... | NA8B/226 | 3331.0 | 3323.0 | MULTIPOLYGON (((174.49427 -36.23026, 174.49386... | 174.494172 | -36.230010 | [174.49427; 174.493857; 174.493618; 174.493729... | [-36.230265; -36.230116; -36.230056; -36.22977... | [174.494901; 174.493729] | [-36.229975; -36.229771] |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 3160123 | 174.728112 | -36.408104 | 2018 | POINT (174.72811 -36.40810) | 7974171 | Lot 82 DP 521864 | DP 521864 | Fee Simple Title | Primary | None | ... | 826394 | 39.0 | 40.0 | MULTIPOLYGON (((174.72813 -36.40815, 174.72812... | 174.728112 | -36.408104 | [174.728128; 174.728123; 174.728076; 174.72808... | [-36.408145; -36.408144; -36.408135; -36.40810... | NaN | NaN |
| 3160124 | 174.728164 | -36.408113 | 2018 | POINT (174.72816 -36.40811) | 7974172 | Lot 83 DP 521864 | DP 521864 | Fee Simple Title | Primary | None | ... | 826395 | 39.0 | 40.0 | MULTIPOLYGON (((174.72813 -36.40815, 174.72814... | 174.728164 | -36.408113 | [174.728128; 174.728137; 174.728139; 174.72814... | [-36.408145; -36.408111; -36.408106; -36.40807... | NaN | NaN |
| 3160630 | 174.655992 | -36.509925 | 2018 | POINT (174.65599 -36.50992) | 7440747 | Lot 2 DP 460961 | DP 460961 | Fee Simple Title | Primary | None | ... | 605470 | 4500.0 | 4495.0 | MULTIPOLYGON (((174.65635 -36.51025, 174.65624... | 174.655985 | -36.510014 | [174.656354; 174.656238; 174.656031; 174.65582... | [-36.510252; -36.510457; -36.510421; -36.51039... | [174.656154; 174.656017; 174.656017; 174.65593... | [-36.509474; -36.50944; -36.50944; -36.509423;... |
| 3164546 | 174.537978 | -36.774834 | 2018 | POINT (174.53798 -36.77483) | 7987560 | Lot 1 DP 533552 | DP 533552 | Fee Simple Title | Primary | None | ... | 878889 | 62163.0 | 62143.0 | MULTIPOLYGON (((174.53978 -36.77485, 174.53974... | 174.538969 | -36.773813 | [174.539778; 174.539738; 174.539702; 174.53967... | [-36.774848; -36.774902; -36.774953; -36.77500... | [174.539778; 174.539832; 174.539832; 174.53988... | [-36.774848; -36.774768; -36.774768; -36.77468... |
| 3166852 | 174.644137 | -36.854796 | 2018 | POINT (174.64414 -36.85480) | 7799795 | Section 36 SO 498829 | SO 498829 | Fee Simple Title | Primary | [Create] Fee Simple Title New Zealand Gazette ... | ... | 900731 | 1460.0 | 1459.0 | MULTIPOLYGON (((174.64432 -36.85494, 174.64428... | 174.644048 | -36.854825 | [174.64432; 174.644277; 174.644156; 174.643992... | [-36.854943; -36.854953; -36.854981; -36.85501... | [174.643415; 174.643431] | [-36.854967; -36.854998] |
383272 rows × 21 columns
h. AUP Zone Code of adjoining parcels (this includes residential, business, and rural zones; it should also include roads, water and open spaces) LINZ_parcel_sides_zones
%%time
zones = gpd.read_file("input/MASTER_UP_BaseZone_SHP.zip")
zones = zones.to_crs(parcels.crs)
zones
CPU times: user 29.3 s, sys: 2.41 s, total: 31.7 s Wall time: 31.9 s
| OBJECTID | CONTOUR | created_da | DocumentUR | GlobalID | GROUPZONE | GROUPZONE_ | last_edite | NAME | PARCEL_BAS | ... | TYPE | TYPE_resol | VERSIONSTA | VERSIONS_1 | ZONE | ZONE_resol | ZONEHEIGHT | SHAPE_Leng | SHAPE_Area | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1.0 | None | 20160718211 | None | {4C8F9436-7EA6-417E-B64F-15FCD44459F6} | 2 | Residential | 20161111010 | None | None | ... | None | None | 4 | Operative | 60 | Residential - Mixed Housing Urban Zone | NaN | 285.664016 | 2.050275e+03 | POLYGON ((174.88890 -37.02031, 174.88893 -37.0... |
| 1 | 2.0 | None | 20160718211 | None | {604AAD87-8ED4-4111-8276-47CEE7E81F92} | 1 | Public Open Space | 20161111010 | None | None | ... | None | None | 4 | Operative | 33 | Open Space - Sport and Active Recreation Zone | NaN | 1246.837757 | 1.684599e+04 | POLYGON ((174.84254 -36.85175, 174.84198 -36.8... |
| 2 | 3.0 | None | 20160718211 | None | {8D827DA8-BC5B-437A-B17A-532354F7D037} | 4 | Rural | 20161111010 | None | None | ... | None | None | 4 | Operative | 11 | Rural - Mixed Rural Zone | NaN | 3582.113246 | 6.841744e+05 | POLYGON ((174.56993 -36.78067, 174.56991 -36.7... |
| 3 | 4.0 | None | 20160718211 | None | {96C9E266-3341-4C71-94F1-325F2EE45732} | 2 | Residential | 20161111010 | None | None | ... | None | None | 4 | Operative | 23 | Residential - Large Lot Zone | NaN | 317.098469 | 6.024226e+03 | POLYGON ((174.68648 -36.77575, 174.68596 -36.7... |
| 4 | 5.0 | None | 20160718211 | None | {90B50FEE-45A3-4E88-819A-370751ACDE3D} | 1 | Public Open Space | 20161111010 | None | None | ... | None | None | 4 | Operative | 31 | Open Space - Conservation Zone | NaN | 230.836636 | 5.639794e+02 | POLYGON ((174.58715 -36.87164, 174.58733 -36.8... |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 130295 | 130296.0 | None | 20161115151 | None | {B2F0FB45-80F6-41ED-AA57-A914B195B31E} | 1 | Public Open Space | 20161115151 | None | None | ... | None | None | 4 | Operative | 31 | Open Space - Conservation Zone | NaN | 179382.307787 | 9.040193e+07 | POLYGON ((174.55200 -37.01270, 174.55225 -37.0... |
| 130296 | 130297.0 | None | 20161115151 | None | {A3F4EF61-9162-43C3-90BD-DC84D93C6A64} | 2 | Residential | 20161115151 | None | None | ... | None | None | 4 | Operative | 18 | Residential - Mixed Housing Suburban Zone | NaN | 2769.292040 | 2.920602e+05 | POLYGON ((174.91212 -36.97863, 174.91223 -36.9... |
| 130297 | 130298.0 | None | 20161115151 | None | {A5FC7EB5-76E5-414C-96DD-715C671764B4} | 4 | Rural | 20161115151 | Kaipara South Head and Harbour coastal area | None | ... | None | None | 4 | Operative | 46 | Rural - Rural Coastal Zone | NaN | 27168.762393 | 6.344938e+06 | POLYGON ((174.45505 -36.53462, 174.45506 -36.5... |
| 130298 | 130299.0 | None | 20161115151 | None | {C371B83C-35CE-46A1-B94F-F1E592F271F7} | 2 | Residential | 20161115151 | None | None | ... | None | None | 4 | Operative | 8 | Residential - Terrace Housing and Apartment Bu... | NaN | 1812.109533 | 1.536829e+05 | POLYGON ((174.60367 -36.81788, 174.60326 -36.8... |
| 130299 | 130300.0 | None | 20161115151 | None | {31281924-C74D-4038-8260-FE6D886F4C94} | 2 | Residential | 20161115151 | None | None | ... | None | None | 4 | Operative | 18 | Residential - Mixed Housing Suburban Zone | NaN | 2511.267945 | 3.539606e+05 | POLYGON ((174.67000 -36.80217, 174.67007 -36.8... |
130300 rows × 31 columns
%%time
df.geometry = df.parcel_geometry
df = df.drop(columns="index_right")
df.sindex
parcels.sindex
df.has_sindex, parcels.has_sindex
CPU times: user 21 s, sys: 1.58 s, total: 22.6 s Wall time: 22.9 s
(True, True)
%%time
touches = gpd.sjoin(df, parcels, op="touches").parcel_geometry_right
CPU times: user 12min 48s, sys: 4.19 s, total: 12min 52s Wall time: 12min 53s
touches
CL_QPID_output2
75494 MULTIPOLYGON (((174.58967 -36.18646, 174.58967...
75494 MULTIPOLYGON (((174.58888 -36.18587, 174.58877...
75494 MULTIPOLYGON (((174.58872 -36.18652, 174.58877...
75494 MULTIPOLYGON (((174.59156 -36.18479, 174.59187...
75494 MULTIPOLYGON (((174.58907 -36.18508, 174.58920...
...
3160630 MULTIPOLYGON (((174.65635 -36.51025, 174.65623...
3164546 MULTIPOLYGON (((174.53767 -36.77386, 174.53780...
3164546 MULTIPOLYGON (((174.53752 -36.77150, 174.53816...
3164546 MULTIPOLYGON (((174.53426 -36.77049, 174.53367...
3166852 MULTIPOLYGON (((174.64341 -36.85497, 174.64335...
Name: parcel_geometry_right, Length: 2076816, dtype: geometry
sample = df.loc[[75494]]
ax = sample.to_crs(epsg=3857).plot(color="red", alpha=.5, edgecolor="black")
touches[[75494]].to_crs(epsg=3857).plot(color="blue", alpha=.5, edgecolor="black", ax=ax)
ctx.add_basemap(ax, source=ctx.providers.Esri.WorldImagery)
touches_df = gpd.GeoDataFrame(geometry=touches.representative_point(), crs=df.crs)
print(len(touches_df), len(zones))
2076816 130300
%%time
touched_zones = gpd.sjoin(touches_df, zones, op="within")
CPU times: user 33.1 s, sys: 2.35 s, total: 35.5 s Wall time: 35.8 s
df["LINZ_parcel_sides_zones"] = touched_zones.groupby("CL_QPID_output2").ZONE.apply(list)
df["LINZ_parcel_sides_zones"]
CL_QPID_output2
75494 [27, 27, 16, 16, 16]
75499 [16, 16, 27, 27, 27]
75639 [16, 16, 16, 27]
75640 [16, 16, 16, 27]
75654 [16, 16, 26]
...
3160123 [46, 18]
3160124 [46, 18]
3160630 [27, 20, 20, 20, 20, 20]
3164546 [27, 18, 18, 18, 26, 26, 26]
3166852 [19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 27]
Name: LINZ_parcel_sides_zones, Length: 383272, dtype: object
df["LINZ_parcel_sides_zones"].str.len().value_counts()
5.0 110872 4.0 90033 6.0 71883 7.0 34022 3.0 28509 8.0 15928 9.0 8615 10.0 4710 11.0 2777 12.0 1787 13.0 1288 14.0 1052 2.0 970 15.0 696 16.0 609 17.0 581 18.0 358 19.0 266 29.0 249 21.0 232 24.0 136 20.0 116 22.0 61 1.0 51 28.0 49 40.0 41 37.0 34 30.0 30 23.0 29 58.0 21 25.0 21 26.0 17 36.0 15 46.0 11 31.0 8 44.0 6 27.0 3 54.0 2 38.0 1 59.0 1 35.0 1 61.0 1 41.0 1 53.0 1 Name: LINZ_parcel_sides_zones, dtype: int64
zones[["ZONE", "ZONE_resol"]].value_counts()
ZONE ZONE_resol 27 Road 47012 30 Coastal - General Coastal Marine Zone 26326 59 Coastal - Coastal Transition Zone 15703 18 Residential - Mixed Housing Suburban Zone 9775 60 Residential - Mixed Housing Urban Zone 5864 26 Strategic Transport Corridor Zone 4215 19 Residential - Single House Zone 3766 31 Open Space - Conservation Zone 3063 32 Open Space - Informal Recreation Zone 2926 8 Residential - Terrace Housing and Apartment Building Zone 2134 16 Rural - Rural Production Zone 1025 25 Water 1013 12 Business - Mixed Use Zone 782 46 Rural - Rural Coastal Zone 768 17 Business - Light Industry Zone 593 44 Business - Neighbourhood Centre Zone 564 20 Residential - Rural and Coastal Settlement Zone 480 33 Open Space - Sport and Active Recreation Zone 477 22 Business - Town Centre Zone 432 23 Residential - Large Lot Zone 379 11 Rural - Mixed Rural Zone 344 3 Rural - Countryside Living Zone 326 4 Future Urban Zone 282 7 Business - Local Centre Zone 266 43 Hauraki Gulf Islands 221 35 Business - City Centre Zone 191 34 Open Space - Community Zone 183 69 Rural - Waitakere Ranges Zone 172 10 Business - Metropolitan Centre Zone 160 5 Business - Heavy Industry Zone 130 63 Special Purpose - School Zone 126 41 Coastal - Mooring Zone 124 49 Business - General Business Zone 103 15 Rural - Rural Conservation Zone 71 68 Rural - Waitakere Foothills Zone 44 53 Special Purpose - Cemetery Zone 41 51 Special Purpose - Quarry Zone 34 52 Special Purpose - Maori Purpose Zone 32 54 Special Purpose - Major Recreation Facility Zone 28 55 Special Purpose - Healthcare Facility and Hospital Zone 23 45 Coastal - Ferry Terminal Zone 20 40 Coastal - Marina Zone 20 62 Open Space - Civic Spaces Zone 18 1 Business - Business Park Zone 16 56 Special Purpose - Airports and Airfields Zone 9 61 Green Infrastructure Corridor 6 64 Special Purpose - Tertiary Education Zone 4 37 Coastal - Minor Port Zone 4 39 Coastal - Defence Zone 3 dtype: int64
sample = df[df["LINZ_parcel_sides_zones"].str.len() == 1].iloc[[0]]
sample
| CL_Longitude | CL_Latitude | QPID_vintage | geometry | appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | ... | survey_area | calc_area | parcel_geometry | LINZ_parcel_centroid_lon | LINZ_parcel_centroid_lat | LINZ_parcel_vertices_lon | LINZ_parcel_vertices_lat | LINZ_parcel_roadvertices_lon | LINZ_parcel_roadvertices_lat | LINZ_parcel_sides_zones | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| CL_QPID_output2 | |||||||||||||||||||||
| 83639 | 174.432209 | -36.43788 | 2020 | MULTIPOLYGON (((174.43214 -36.43812, 174.43212... | Section 29 Block X Tauhoa SD | SO 57581 | DCDB | Primary | None | North Auckland | ... | 1961.0 | 1958.0 | MULTIPOLYGON (((174.43214 -36.43812, 174.43212... | 174.432304 | -36.437643 | [174.432142; 174.432116; 174.432159; 174.43261... | [-36.438119; -36.437691; -36.43738; -36.437394... | [174.432116; 174.432142; 174.432142; 174.43254... | [-36.437691; -36.438119; -36.438119; -36.43757... | [26] |
1 rows × 21 columns
ax = sample.to_crs(epsg=3857).plot(color="red", alpha=.5, edgecolor="black")
touches[[83639]].to_crs(epsg=3857).plot(color="blue", alpha=.5, edgecolor="black", ax=ax)
ctx.add_basemap(ax, source=ctx.providers.Esri.WorldImagery)
df.keys()
Index(['CL_Longitude', 'CL_Latitude', 'QPID_vintage', 'geometry',
'appellation', 'affected_surveys', 'parcel_intent', 'topology_type',
'statutory_actions', 'land_district', 'LINZ_parcel_ID', 'survey_area',
'calc_area', 'parcel_geometry', 'LINZ_parcel_centroid_lon',
'LINZ_parcel_centroid_lat', 'LINZ_parcel_vertices_lon',
'LINZ_parcel_vertices_lat', 'LINZ_parcel_roadvertices_lon',
'LINZ_parcel_roadvertices_lat', 'LINZ_parcel_sides_zones'],
dtype='object')
df.drop(columns=["geometry", "parcel_geometry"]).to_csv("restricted/QPIDs_Auckland_1990_2020_augmented.csv")